####################################
#Diff-in-diff with linear point as SD estimation (no matching) 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

dd_with_linearSDs <- function(my_data, #data frame 
                       treatment_basis_name, #character string 
                       control_basis_name, #character vector 
                       outcome_time1_name, #character string 
                       outcome_time2_name, #character string 
                       treatment_basis_treated_max, #numeric 
                       treatment_basis_control_min, #numeric 
                       control_basis_control_max, #numeric 
                       control_basis_treated_min, #numeric 
                       fixed_effect_names=NULL, 
                       conf_level=0.95, 
                       match=F, 
                       start_browser=F, 
                       type="binary",
                       my_method = "nearest", 
                       my_distance="mahalanobis", 
                       my_discard="none", 
                       my_m.order="largest", 
                       my_replace=T)
{#begin function brackets 
  require(MatchIt, quietly=T)
  
  if(start_browser==T){browser()}
  
  #internal cleaning
  my_data <- as.data.frame(
    cbind(apply(X=my_data[,which(!(colnames(my_data) %in% c("COUNTYFP10", "STATEFP10"))) ], 
                MARGIN=2, 
                FUN=as.numeric),
          data.frame(my_data$COUNTYFP10, my_data$STATEFP10) ) )
  colnames(my_data)[ncol(my_data)-1] <- "COUNTYFP10"
  colnames(my_data)[ncol(my_data)] <- "STATEFP10"
  
  #get treatment basis into memory 
  treatment_basis <- eval(parse(text=paste("my_data$", treatment_basis_name, "")))
  
  #get control basis into memory 
  control_basis <- eval(parse(text=paste("my_data$", control_basis_name, "")))
  
  #determine treated_indicator 
  my_data <- cbind(my_data, NA) 
  colnames(my_data)[ncol(my_data)] <- "treated_indicator"
  my_data$treated_indicator[treatment_basis <= treatment_basis_treated_max & 
                              control_basis >= control_basis_treated_min] <- 1
  my_data$treated_indicator[control_basis <= control_basis_control_max & 
                              treatment_basis >= treatment_basis_control_min] <- 0
  
  my_data <- my_data[is.na(my_data$treated_indicator)==F,]
  #keep_columns <- colnames(my_data) %in% c(outcome_time1_name, outcome_time2_name, "treated_indicator")
  keep_columns <- colnames(my_data) %in% c(outcome_time1_name, outcome_time2_name, "treated_indicator", 
                                           treatment_basis_name, control_basis_name, fixed_effect_names)
  my_data <- my_data[,c(which(keep_columns), which(colnames(my_data) %in% matching_variables))]
  
  #prepare for the linear regression 
  if(match==T)
  { 
    #internally remove rows with NAs from the dataset. 
    my_data <- na.omit(my_data)
    
    matching_formula <- paste("treated_indicator ~", paste(matching_variables, collapse="+"))
    if(my_method=="genetic")
    {  
      my_match <- matchit(as.formula(matching_formula), data = my_data, 
                                 method = my_method, 
                                 distance= my_distance, 
                                 discard= my_discard, 
                                 #m.order=my_m.order, 
                                 replace=my_replace)
    }
    
    if(my_method!="genetic")
    { 
      my_match <- matchit(as.formula(matching_formula), data = my_data, 
                                 method = my_method, 
                                 distance= my_distance, 
                                 discard= my_discard, 
                                 m.order=my_m.order, 
                                 replace=my_replace)
    }  
      my_data <- match.data(my_match) 
  }
  
  ##prep for regression 
  prep1 <- cbind(my_data, 0, my_data[outcome_time1_name])
  prep2 <- cbind(my_data, 1, my_data[outcome_time2_name])
  colnames(prep1)[ncol(prep1)-1] <- "post_indicator"; colnames(prep1)[ncol(prep1)] <- "regression_outcome"
  colnames(prep2)[ncol(prep2)-1] <- "post_indicator"; colnames(prep2)[ncol(prep2)] <- "regression_outcome"
  
  prep_reg <- rbind(prep1, prep2)
  
  ##diff-in-diff estimation
  if(type=="binary")
  {
    lm_summary <- summary(lm(prep_reg[,"regression_outcome"] ~ prep_reg[,"treated_indicator"] + 
                             prep_reg[,"post_indicator"] +  prep_reg[,"treated_indicator"]:prep_reg[,"post_indicator"]))
  }
  
  if(type=="binary_controls")
  { 
    lm_summary <- summary(lm(as.formula(
                          paste("regression_outcome~treated_indicator+post_indicator+treated_indicator:post_indicator", 
                            paste(matching_variables, collapse="+"), sep="+") ), 
                              data=prep_reg))
  } 

  if(type=="binary_fixed_effects")
  { 
    browser() 
    lm_summary <- summary(lm(as.formula(
      paste("regression_outcome~treated_indicator+post_indicator+treated_indicator:post_indicator", 
            paste(fixed_effect_names, collapse="+"), sep="+") ), 
      data=prep_reg))
  } 
  
  if(type=="binary_fixed_effects_with_continuous_controls")
  { 
    lm_summary <- summary(lm(as.formula(
      paste("regression_outcome~treated_indicator+post_indicator+treated_indicator:post_indicator", 
            paste(paste(fixed_effect_names, collapse="+"), matching_variables, sep="+", collapse="+"), sep="+") ), 
      data=prep_reg))
  } 
    
  if(type=="continuous")
  { 
    lm_summary <- summary(lm(as.formula(sprintf("regression_outcome~post_indicator+ 
                                        %s:post_indicator", treatment_basis_name)), 
                                        data=prep_reg))
  }
    
  if(type=="continuous_controls")
  { 
    lm_summary <- summary(lm(as.formula(
        paste(sprintf("regression_outcome~post_indicator + %s:post_indicator", treatment_basis_name),
            paste(matching_variables, collapse="+"), sep="+") ), 
            data=prep_reg))
  }
  
  #if(type=="fixed_effects")
  #{ 
  #  gain_score <- my_data[,outcome_time2_name] - my_data[,outcome_time1_name]
  #  lm_summary <- summary( lm( gain_score ~ as.factor(my_data$STATEFP10) + 
  #                                          my_data$treated_indicator) ) # as.factor(my_data$COUNTYFP10) +
   #}

  lm_result <- lm_summary[[4]][nrow(lm_summary[[4]]),]

  #return
  return_list <- list(lm_summary=lm_result)
  return(return_list)
}#end function barkets 
